In [1]:
# Instalar las versiones específicas de dash y werkzeug
# !pip install dash==2.14.2 werkzeug==2.2.3
In [2]:
# Importar librerías necesarias
import pandas as pd
import plotly.express as px
from pathlib import Path
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import numpy as np

# Ignorar advertencias de FutureWarning
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

# Cargar el conjunto de datos 
df = pd.read_csv("../data/USAHousingDataset.csv")
df.head(2)
Out[2]:
date price bedrooms bathrooms sqft_living sqft_lot floors waterfront view condition sqft_above sqft_basement yr_built yr_renovated street city statezip country
0 2014-05-09 00:00:00 376000.0 3.0 2.00 1340 1384 3.0 0 0 3 1340 0 2008 0 9245-9249 Fremont Ave N Seattle WA 98103 USA
1 2014-05-09 00:00:00 800000.0 4.0 3.25 3540 159430 2.0 0 0 3 3540 0 2007 0 33001 NE 24th St Carnation WA 98014 USA

Prueba primera imagen¶

In [3]:
# Set default renderer for Plotly
# Esto permite que las gráficas se muestren correctamente en Jupyter Notebooks

import plotly.io as pio
pio.renderers.default = "notebook"
In [4]:
# Ajustar tipos de datos y limpiar datos
df.columns = df.columns.str.strip()
df['price'] = pd.to_numeric(df['price'], errors='coerce')
df['sqft_living'] = pd.to_numeric(df['sqft_living'], errors='coerce')
df['city'] = df['city'].fillna("Unknown")
In [5]:
# Scatter interactivo
fig = px.scatter(
    df,
    x="sqft_living",
    y="price",
    color="city",
    title="Precio Vs. Pies Cuadrados por Ciudad",
    labels={"sqft_living": "Área (pies cuadrados - sqft)", "price": "Precio"},
    template="plotly_white"
)
In [6]:
# Menú dropdown para filtrar por ciudad dentro del gráfico
buttons = [
    dict(label="Todas",
         method="update",
         args=[{"visible": [True]*len(df.city.unique())}])
]
In [7]:
# Agregar botones para cada ciudad
# Prueba con una gráfica de dispersión
for i, city in enumerate(df.city.unique()):
    vis = [c == city for c in df.city.unique()]
    buttons.append(
        dict(label=city,
             method="update",
             args=[{"visible": vis}])
    )

fig.update_layout(
    updatemenus=[dict(active=0, buttons=buttons, x=1.1, y=1.1)]
)

fig.show()

Armando imágenes para el tablero¶

In [8]:
# Preparar datos adicionales
# Agregar columnas necesarias para análisis
df['city'] = df['city'].fillna("Unknown")
df['price_per_sqft'] = df['price'] / df['sqft_living'].replace({0: np.nan})

# Lista de ciudades
cities = sorted(df['city'].dropna().unique().astype(str))
all_label = "Todas"
options = [all_label] + cities

# Armando elementos para el tablero
# Mediana global del precio de la vivienda
median_global_full = float(df['price'].median())
median_global_full
Out[8]:
460000.0
In [9]:
# Array para almacenar los datos por ciudad
city_payload = {}

# Calculo del top 10 en una tabla
res_global = df.groupby("city").agg(n=("price","count"), med=("price","median")).sort_values("n", ascending=False).head(10).reset_index()
table_header = list(res_global.columns)
table_cells = [res_global[c].tolist() for c in res_global.columns]

# Recopilar datos por ciudad
for city in options:
    if city == all_label:
        d = df.copy()
    else:
        d = df[df['city'] == city]

    # Dispersión (scatter) de precio y area
    scatter_x = d['sqft_living'].fillna('').astype(object).tolist()
    scatter_y = d['price'].fillna('').astype(object).tolist()
    custom = np.stack([d['city'].astype(str).tolist(), d.get('bedrooms', pd.Series([""]*len(d))).astype(str).tolist()], axis=1).tolist() if len(d) else [[],[]]

    # Histograma de precios
    hist_x = d['price'].fillna('').astype(object).tolist()

    # Serie de tiempo (ts) de mediana mensual
    if 'date' in d.columns and d['date'].notna().any():
        d_dates = d.dropna(subset=['date','price']).copy()
        d_dates['date'] = pd.to_datetime(d_dates['date'])
        ts = d_dates.set_index('date').resample('ME')['price'].median().reset_index()
        ts_x = ts['date'].tolist()
        ts_y = ts['price'].tolist()
    else: # Si no hay datos de fecha
        ts_x, ts_y = [], []

    # Box plot de precio por número de habitaciones
    box_x = d['bedrooms'].astype(str).fillna('').tolist() if 'bedrooms' in d.columns else []
    box_y = d['price'].fillna('').astype(object).tolist()

    # Almacenar los datos en el diccionario por ciudad
    city_payload[city] = {
        "scatter_x": scatter_x,
        "scatter_y": scatter_y,
        "customdata": custom,
        "hist_x": hist_x,
        "ts_x": ts_x,
        "ts_y": ts_y,
        "box_x": box_x,
        "box_y": box_y,
        # if you want the table per city uncomment the following:
        # "table_cells": [ ... ]
    }

display (res_global)
city n med
0 Seattle 1415 488000.0
1 Renton 261 345000.0
2 Bellevue 260 727016.0
3 Redmond 209 640000.0
4 Kent 167 283200.0
5 Kirkland 166 523500.0
6 Issaquah 162 561000.0
7 Auburn 162 268500.0
8 Sammamish 158 662500.0
9 Federal Way 131 263000.0
In [10]:
# Crear la figura con subplots - Tablero
fig = make_subplots(
    rows=3, cols=2,
    specs=[
        [{"type": "xy"}, {"type": "xy"}],
        [{"type": "xy"}, {"type": "xy"}],
        [{"type": "table"}, {"type": "xy"}]
    ],
    subplot_titles=(
        "Precio Mediano de Vivienda en USA",
        "Precio Vs. Area",
        "Distribución de Precios",
        "Precio por Tiempo",
        "Top Ciudades con precios más altos",
        "Precio mediano Vs. Año de construcción"
    ),
    vertical_spacing=0.10,
    horizontal_spacing=0.10
)
In [11]:
# KPI valor de venta mediana en USA
annotation_kpi = dict(
    text=f"Precio mediano — General\n${median_global_full:,.0f}",
    x=0.06, y=0.96, xref='paper', yref='paper', xanchor='left', yanchor='top',
    showarrow=False, font=dict(size=20, color="#0b3b59"), align="left"
)
fig.update_layout(annotations=[annotation_kpi])

fig.show()
In [12]:
# Gráfico de dispersión (scatter) inicial
init = city_payload[all_label]
scatter = go.Scattergl(
    x=init["scatter_x"], y=init["scatter_y"], mode='markers',
    marker=dict(size=6, opacity=0.7),
    customdata=init["customdata"],
    hovertemplate="City: %{customdata[0]}<br>Beds: %{customdata[1]}<br>Precio: %{y:$,.0f}<extra></extra>"
)
fig.add_trace(scatter, row=1, col=2)

fig.update_xaxes(title_text="Área (pies cuadrados)", row=1, col=2)
fig.update_yaxes(title_text="Precio (USD)", row=1, col=2)

fig.show()
In [13]:
# Histograma 
hist = go.Histogram(x=init["hist_x"], nbinsx=35, opacity=0.75)
fig.add_trace(hist, row=2, col=1)

fig.update_xaxes(title_text="Precio (USD)", row=2, col=1)

fig.show()
In [14]:
# Serie de tiempo 
ts_line = go.Scatter(x=init["ts_x"], y=init["ts_y"], mode='lines+markers')
fig.add_trace(ts_line, row=2, col=2)

fig.update_xaxes(title_text="Fecha de venta", row=2, col=2)
fig.update_yaxes(title_text="Precio mediano (USD)", row=2, col=2)

fig.show()
In [15]:
# Tabla de top 10 ciudades con precios más altos
table = go.Table(
    header=dict(
        values=["Ciudad", "Cantidad de propiedades", "Precio mediano (USD)"],
        fill_color="lightgray",
        font=dict(size=13, color="#0b3b59", family="Arial Black"),
        align="center"
    ),
    cells=dict(
        values=table_cells, 
        format=[None, ",", "$,.0f"], 
        align="center",
        font=dict(size=12)
    )
)
fig.add_trace(table, row=3, col=1)

fig.show()
In [16]:
# Tabla de top 10 ciudades con precios más altos
# Generador de tendencia para precio mediano vs año de construcción
df_valid = df[df['yr_built'] > 0].copy()
trend = df_valid.groupby("yr_built")["price"].median().reset_index()

# Gráfico de línea para precio mediano vs año de construcción
age_line = go.Scatter(
    x=trend["yr_built"],
    y=trend["price"],
    mode="lines+markers",
    marker=dict(size=6),
    line=dict(width=2, color="#1f77b4"),
    hovertemplate="Año: %{x}<br>Precio mediano: $%{y:,.0f}<extra></extra>"
)

fig.add_trace(age_line, row=3, col=2)

fig.update_xaxes(title_text="Año de construcción", row=3, col=2)
fig.update_yaxes(title_text="Precio mediano (USD)", row=3, col=2)

fig.show()

Ajustando tablero final¶

In [17]:
# Librerías necesarias
import pandas as pd
import numpy as np
from pathlib import Path
from plotly.subplots import make_subplots
import plotly.graph_objects as go


# Preparar datos adicionales
# Agregar columnas necesarias para análisis
df['city'] = df['city'].fillna("Unknown")
df['price_per_sqft'] = df['price'] / df['sqft_living'].replace({0: np.nan})

# Lista de ciudades
cities = sorted(df['city'].dropna().unique().astype(str))
all_label = "Todas"
options = [all_label] + cities

# Armando elementos para el tablero
# Mediana global del precio de la vivienda
median_global_full = float(df['price'].median())

# Array para almacenar los datos por ciudad
city_payload = {}

# Calculo del top 10 en una tabla
res_global = df.groupby("city").agg(n=("price","count"), med=("price","median")).sort_values("n", ascending=False).head(10).reset_index()
table_header = list(res_global.columns)
table_cells = [res_global[c].tolist() for c in res_global.columns]

# Recopilar datos por ciudad
for city in options:
    if city == all_label:
        d = df.copy()
    else:
        d = df[df['city'] == city]

    # Dispersión (scatter) de precio y area
    scatter_x = d['sqft_living'].fillna('').astype(object).tolist()
    scatter_y = d['price'].fillna('').astype(object).tolist()
    custom = np.stack([d['city'].astype(str).tolist(), d.get('bedrooms', pd.Series([""]*len(d))).astype(str).tolist()], axis=1).tolist() if len(d) else [[],[]]

    # Histograma de precios
    hist_x = d['price'].fillna('').astype(object).tolist()

    # Serie de tiempo de mediana mensual
    if 'date' in d.columns and d['date'].notna().any():
        d_dates = d.dropna(subset=['date','price']).copy()
        d_dates['date'] = pd.to_datetime(d_dates['date'])
        ts = d_dates.set_index('date').resample('ME')['price'].median().reset_index()
        ts_x = ts['date'].tolist()
        ts_y = ts['price'].tolist()
    else:
        ts_x, ts_y = [], []

    # Tendencia de precio mediano vs año de construcción
    d_year = d[(d['yr_built'].notna()) & (d['yr_built'] > 0) & (d['price'].notna())].copy()
    if len(d_year):
        trend_year = d_year.groupby("yr_built")["price"].median().reset_index().sort_values("yr_built")
        yr_x = trend_year['yr_built'].tolist()
        yr_y = trend_year['price'].tolist()
    else:
        yr_x, yr_y = [], []

    # Almacenar los datos en el diccionario por ciudad
    city_payload[city] = {
        "scatter_x": scatter_x,
        "scatter_y": scatter_y,
        "customdata": custom,
        "hist_x": hist_x,
        "ts_x": ts_x,
        "ts_y": ts_y,
        "box_x": box_x,
        "box_y": box_y,
        "yr_x": yr_x,
        "yr_y": yr_y
    }

# Crear la figura con subplots - Tablero
fig = make_subplots(
    rows=3, cols=2,
    specs=[
        [{"type": "xy"}, {"type": "xy"}],
        [{"type": "xy"}, {"type": "xy"}],
        [{"type": "table"}, {"type": "xy"}]
    ],
    subplot_titles=(
        "Precio Mediano de Vivienda en USA",
        "Precio Vs. Area",
        "Distribución de Precios",
        "Precio por Tiempo",
        "Top Ciudades con precios más altos",
        "Precio mediano Vs. Año de construcción"
    ),
    vertical_spacing=0.10,
    horizontal_spacing=0.10
)

# KPI valor de venta mediana en USA
annotation_kpi = dict(
    text=f"<b>Precio mediano — General</b><br><span style='font-size:26px;'>${median_global_full:,.0f}</span>",
    x=0.03, y=0.96, xref='paper', yref='paper', xanchor='left', yanchor='top',
    showarrow=False, font=dict(size=20, color="#0b3b59"), align="left"
)
fig.update_layout(annotations=[annotation_kpi])

# Gráfico de dispersión (scatter) inicial
init = city_payload[all_label]
scatter = go.Scattergl(
    x=init["scatter_x"], y=init["scatter_y"], mode='markers',
    marker=dict(size=6, opacity=0.7),
    customdata=init["customdata"],
    hovertemplate="City: %{customdata[0]}<br>Beds: %{customdata[1]}<br>Precio: %{y:$,.0f}<extra></extra>"
)
fig.add_trace(scatter, row=1, col=2)

fig.update_xaxes(title_text="Área (pies cuadrados)", row=1, col=2)
fig.update_yaxes(title_text="Precio (USD)", row=1, col=2)

# Histograma 
hist = go.Histogram(x=init["hist_x"], nbinsx=35, opacity=0.75)
fig.add_trace(hist, row=2, col=1)

fig.update_xaxes(title_text="Precio (USD)", row=2, col=1)
fig.update_yaxes(title_text="Cantidad de propiedades", row=2, col=1)

# Serie de tiempo 
ts_line = go.Scatter(x=init["ts_x"], y=init["ts_y"], mode='lines+markers')
fig.add_trace(ts_line, row=2, col=2)

fig.update_xaxes(title_text="Fecha de venta", row=2, col=2)
fig.update_yaxes(title_text="Precio mediano (USD)", row=2, col=2)

# Tabla de top 10 ciudades con precios más altos
table = go.Table(
    header=dict(
        values=["Ciudad", "Cantidad de propiedades", "Precio mediano (USD)"],
        fill_color="lightgray",
        font=dict(size=13, color="#0b3b59", family="Arial Black"),
        align="center"
    ),
    cells=dict(
        values=table_cells, 
        format=[None, ",", "$,.0f"], 
        align="center",
        font=dict(size=12)
    )
)
fig.add_trace(table, row=3, col=1)

# Tendencia año construcción
age_line = go.Scatter(
    x=init["yr_x"],
    y=init["yr_y"],
    mode="lines+markers",
    marker=dict(size=6),
    line=dict(width=2, color="#1f77b4"),
    hovertemplate="Año: %{x}<br>Precio mediano: $%{y:,.0f}<extra></extra>"
)

fig.add_trace(age_line, row=3, col=2)

fig.update_xaxes(title_text="Año de construcción", row=3, col=2)
fig.update_yaxes(title_text="Precio mediano (USD)", row=3, col=2)

# Construir botones para actualizar los datos por ciudad
buttons = []
total_traces = len(fig.data)

def build_update_args_for_city(payload):
    x_list = [None] * total_traces
    y_list = [None] * total_traces
    custom_list = [None] * total_traces
    cells_list = [None] * total_traces

    # scatter
    x_list[0] = payload["scatter_x"]
    y_list[0] = payload["scatter_y"]
    custom_list[0] = payload["customdata"]

    # hist
    x_list[1] = payload["hist_x"]

    # time series
    x_list[2] = payload["ts_x"]
    y_list[2] = payload["ts_y"]

    # table
    cells_list[3] = table_cells

    # trend yr
    x_list[4] = payload["yr_x"]
    y_list[4] = payload["yr_y"]

    return {"x": x_list, "y": y_list, "customdata": custom_list, "cells.values": cells_list}


# Crear botones
for city in options:
    payload = city_payload[city]
    args0 = build_update_args_for_city(payload)

    if city == all_label:
        ann_text = f"<b>Precio mediano — General</b><br><span style='font-size:26px;'>${median_global_full:,.0f}</span>"
    else:
        med = float(df[df['city'] == city]['price'].median())
        ann_text = f"<b>Precio mediano — {city}</b><br><span style='font-size:26px;'>${med:,.0f}</span>"

    args1 = {
        "annotations": [dict(
            text=ann_text,
            x=0.03, y=0.96, xref='paper', yref='paper', xanchor='left', yanchor='top',
            showarrow=False, font=dict(size=20, color="#0b3b59")
        )]
    }

    buttons.append(dict(label=city, method="update", args=[args0, args1]))

# Actualizar el diseño de la figura con el filtro creado
fig.update_layout(
    updatemenus=[dict(
        active=0, buttons=buttons,
        x=0.98, y=1.06,
        xanchor="right", yanchor="top",
        showactive=True
    )],

    annotations=[
        # KPI (arriba-izq)
        dict(
            text=f"<b>Precio mediano — General</b><br><span style='font-size:26px; color:#0b3b59;'>${median_global_full:,.0f}</span>",
            x=0.03, y=0.96,
            xref="paper", yref="paper",
            xanchor="left", yanchor="top",
            showarrow=False,
            font=dict(size=20, color="#0b3b59"),
            align="left"
        ),

        # Label del filtro (sobre el dropdown)
        dict(
            text="<b>Filtro por ciudad:</b>",
            x=0.83, y=1.055,
            xref="paper", yref="paper",
            xanchor="right", yanchor="top",
            showarrow=False,
            font=dict(size=14, color="#0b3b59"),
            align="right"
        )
    ],

    autosize=False, width=1200, height=920,
    title=dict(
        text="<b><span style='color:#0b3b59;'>🇺🇸 DASHBOARD – USA HOUSING 🇺🇸</span></b>",
        x=0.5, xanchor="center", font=dict(size=24)
    ),
    showlegend=False,
    template="plotly_white",
    margin=dict(t=120, l=60, r=120, b=40)
)

# Mostrar tablero
fig.show()

Incluyendo ingreso de variables para predecir¶

In [18]:
from IPython.display import display, HTML
import json

# Convertir la figura Plotly a HTML
html_plot = fig.to_html(full_html=False, include_plotlyjs='cdn')

# Definir campos de entrada
numeric_fields = [
    "bedrooms","bathrooms","sqft_living","sqft_lot",
    "floors","waterfront","view","condition","sqft_above",
    "sqft_basement","yr_built","yr_renovated"
]
string_fields = ["street","statezip"]

# Generar inputs HTML
inputs_html = ""
for f in numeric_fields:
    inputs_html += (
        f"<div class='field-row'>"
        f"<label for='{f}' class='lbl'>{f}</label>"
        f"<input type='number' id='{f}' class='inp num' />"
        f"</div>"
    )
for f in string_fields:
    inputs_html += (
        f"<div class='field-row'>"
        f"<label for='{f}' class='lbl'>{f}</label>"
        f"<input type='text' id='{f}' class='inp txt' />"
        f"</div>"
    )

# Generar JS para capturar inputs
js_lines = []
for f in numeric_fields:
    js_lines.append(f"payload['{f}'] = document.getElementById('{f}').value ? Number(document.getElementById('{f}').value) : null;")
for f in string_fields:
    js_lines.append(f"payload['{f}'] = document.getElementById('{f}').value || '';")
js_payload = "\n        ".join(js_lines)

median_mock = float(df['price'].median())

# Construir el HTML completo
# HTML final con estilos para parecerse al dashboard original
html = f"""
<style>
/* Layout */
.dashboard-wrap {{ display:flex; gap:24px; font-family: 'Trebuchet MS', Arial, sans-serif; align-items:flex-start; }}
.left-panel {{ flex: 3; min-width: 820px; }}
.right-panel {{ 
    flex: 1; 
    max-width: 360px; 
    background: #fafafa; 
    border-left: 8px solid #222; 
    padding:18px;
    border-radius:6px; 
    height:884px;
    overflow:auto;
    box-shadow: 0 2px 6px rgba(0,0,0,0.03); 
}}

/* Header / KPI area (aplica dentro del plotly) */
.header-title {{ text-align:center; font-weight:700; color:#0b3b59; font-size:24px; margin-bottom:6px; }}

/* Form styling */
.field-row {{ margin-bottom:8px; display:flex; flex-direction:column; }}
.lbl {{ font-size:12px; color:#7d868f; margin-bottom:4px; text-transform:lowercase; }}
.inp {{
    padding:2px 4px;
    height: 24px;
    border:1px solid #cfcfcf;
    border-radius:4px;
    outline:none;
    font-size:13px;
    box-sizing:border-box;
}}
.inp:focus {{ border-color:#7fbf7f; box-shadow: 0 0 0 3px rgba(127,191,127,0.08); }}

/* Numeric narrower inputs */
.inp.num {{ width:100%; }}

/* Button */
.btn-predict {{
    background:#2ca02c; color:white; border:none; padding:10px 14px; width:100%; border-radius:6px;
    font-weight:600; font-size:14px; cursor:pointer; margin-top:10px;
}}
.btn-predict:hover {{ opacity:0.95; }}

/* Resultado box */
.result-box {{     
    margin-top:12px;
    padding:6px 10px; 
    background:white;
    border:1px solid #dcdcdc;
    border-radius:6px;
    font-size:14px;
    color:#0b3b59;
    height:auto;       
    min-height: auto;     
    line-height: 1.2;  
}}

/* Sección labels */
.side-title {{ font-weight:700; color:#0b3b59; margin-bottom:6px; font-size:16px; }}
.side-sub {{ color:#7d868f; font-size:12px; margin-bottom:10px; }}

/* Ajustes responsive */
@media (max-width: 1200px) {{
    .left-panel {{ min-width: 680px; }}
    .right-panel {{ max-width:320px; }}
}}
</style>

<div class="dashboard-wrap">
  <div class="left-panel">
    <!-- aquí se inserta la visualización de plotly -->
    {html_plot}
  </div>

  <div class="right-panel">
    <div class="side-title">Predicción</div>
    <div class="side-sub">Ingresa los valores y genera la predicción</div>

    <!-- inputs -->
    <div id="inputs-area">
      {inputs_html}
    </div>

    <button class="btn-predict" onclick="generarPrediccion()">Generar predicción</button>

    <hr style="margin:12px 0;">
    <div style="font-size:13px; color:#7d868f; margin-bottom:6px;">Resultado</div>
    <div id="resultado" class="result-box"><i>Aún no se ha generado predicción.</i></div>
  </div>
</div>

<script>
function generarPrediccion() {{
    let payload = {{}};

    {js_payload}

    // Guardar globalmente para inspeccionar con window.payload desde la consola
    window.payload = payload;

    // Validaciones básicas: ejemplo, convertir NaN a null y chequeo simple
    for (let k in payload) {{
        if (typeof payload[k] === 'number' && isNaN(payload[k])) payload[k] = null;
    }}

    // Mock de predicción precio
    let pred = {median_mock};

    // Mostrar resultado con formato similar a dashboard
    document.getElementById('resultado').innerHTML = "<div style='font-weight:700;color:#0b3b59;font-size:16px;'>$" + pred.toLocaleString() 
    + "</div><div style='color:#6b6f75;font-size:12px;margin-top:6px;'></div>";

    console.log("Payload (listo para backend):", payload);
}}
</script>
"""

display(HTML(html))
Predicción
Ingresa los valores y genera la predicción

Resultado
Aún no se ha generado predicción.
In [ ]: